Cleaned data

Imported and cleaned all kiln data available from 2018-2020.

Involved using an algorithm to remove high peaks and valleys, detection of when the “start” of a run was based on setpoint and kiln temperature increases. We now have mostly clean plots with a few exceptions.

Assorted

Example of setpoint vs average kiln temperature readings from assorted lots from each kiln.

all_kilns <- bind_rows(
  kilns_AB %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_C  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_D  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_E  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_F  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_G  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
  kilns_H  %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln)
)

# random sample of LOTNOs
set.seed(505)
n_kilns <- sample_n(all_kilns, 16) %>% dplyr::select(LOTNO) %>% unlist()

n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "A"), 2) %>% dplyr::select(LOTNO) %>% unlist()
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "B"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "C"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "D"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "E"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "F"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "G"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "H"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- unlist(n_kilns)

sample_kilns <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns, lot_compare=T)

A

# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "A"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_a <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_a)

B

# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "B"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_b <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_b)

C

# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "C"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_c <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_c)

D

# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "D"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_d <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_d)

E

# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "E"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_e <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_e)

F

# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "F"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_f <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_f)

G

# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "G"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_g <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_g)

H

# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "H"), 16) %>% dplyr::select(LOTNO) %>% unlist()

sample_kilns_h <- all_kilns %>% 
  dplyr::filter(LOTNO %in% n_kilns) %>% 
  mutate(LOTNO = as.character(LOTNO)) %>% 
  mutate(LOTNO = factor(LOTNO)) %>% 
  mutateAucValues()

plot_range(sample_kilns_h)

Closer look

plot_range(sample_kilns_h, filter = "010819H", plotly_on = TRUE)

AUC Calculations

Feature engineering: using domain knowledge to extract features from raw data.

One such feature: variation between setpoint and average kiln temperatures between in 400°C to 600°C range. Algorithm was developed and deployed on all kiln data to produce a numeric value labeled AUC or aucDiff (area under curves, area under curves difference, area between the curves, etc…)

Algorithm overview

AUC

Base plot of temperature and setpoint over time, with green area representing the calculated area between the two curves. Numeric values also printed for comparison.

plotAucValues(sample_kilns, x.nudge = 900, y.nudge = 0)

Cropped

plotAucValues(sample_kilns, crop=T, x.nudge = 0, y.nudge = 200)

Cropped, unlabeled

plotAucValues(sample_kilns, crop=T, free.x=T)

Value distribution

Should be noted that the extracted values differ significantly between kilns. If we want to compare metrics across kilns in the future, all values will need to be scaled with one another.

Some analyses may also require normalization due to the non-normal distributions.

Boxplot

AUC values differ significantly between kilns.

df_merged_auc %>% 
  group_by(LOTNO) %>% slice(1) %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  # ggplot(aes(x=aucDiff, y=fct_reorder(KILN,aucDiff), fill =KILN2))+
  ggplot(aes(x=aucDiff, y=fct_reorder(KILN2,aucDiff)))+
  geom_boxplot(outlier.alpha = 0,
               outlier.shape = 21)+
  geom_jitter(height = .2, alpha=.1)+
  labs(title = "Setpoint vs temperature variation between kilns")+
  xlab("Area between curves")+
  ylab("Kiln")+
  theme(legend.position = 'none')+
  scale_x_continuous(labels = scales::label_number())

Density

Non-normal, mostly skewed distributions.

df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  ggplot(aes(x=aucDiff, y = ..count../sum(..count..)))+
  geom_density()+
  scale_y_continuous(labels = scales::percent_format())+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  facet_wrap(~KILN2, scales='free')

  # facet_wrap(~KILN2)

vs Lot yields

The easiest analysis to perform is a simple comparison of AUC vs Lot yield on a per kiln basis.

This is frankly not expected to be useful as we know compositions and items, among other things may behave quite differently. Lumping them alltogether is a decent start, however.

The low correlation values less than 0.3 indicate weak correlation.

# join correlation of AUC, lot yield to original DF and plot
df <- df_yields_auc %>% 
  group_by(LOTNO, KILN, aucDiff, temp_avg, precip, snow_fall, snow_depth) %>% 
  dplyr::summarise(
    total_fired = sum(TOTAL_ITEM_FIRED),
    total_rejected = sum(TOTAL_ITEM_REJECTED),
    pct_lot_yield = (total_fired - total_rejected) / total_fired
  ) %>% 
  mutate(KILN2 = str_replace(KILN, "R", ""))

df <- df %>% 
  group_by(KILN2) %>% 
  dplyr::summarise(cor = cor(pct_lot_yield, aucDiff)) %>% 
  left_join(df) %>% 
  mutate(kiln_cor = factor(paste0(KILN2, " (", round(cor,2), ")")))

df %>% 
  ggplot(aes(x=aucDiff, y=pct_lot_yield))+
  geom_smooth(alpha=.1, color='red',method='lm')+
  geom_pointdensity(alpha=.8, size=1)+
  scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  facet_wrap(~kiln_cor, scales='free')+
  ylab('Lot yield')+
  xlab('Area between curves')+
  labs(title = 'AUC versus entire lot yields',
       subtitle = 'Correlation value (in parentheses)')+
  theme(legend.position = 'none')

# 
# 
# df %>% count(cor, KILN2) %>%  
#   arrange(-abs(cor)) %>% 
#   mutate(
#     cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
#          ) %>% 
#   set_colnames(c("Correlation", "Kiln", "Observations")) %>% 
#   kable(format = 'html', escape = 'F') %>% 
#   kable_styling('striped',full_width = F)

vs Item yields, by Kiln

Different items may react differently to different AUC values. We can extract the top items (most lot numbers associated with) from each kiln and generate a plot similar to above.

Is the AUC feature related to individual item yields within each kiln?

A

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "A")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
  geom_smooth(alpha=.1, color='red',method='lm')+
  geom_pointdensity()+
  # scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::percent_format())+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  ylab('Item yield')+
  xlab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
  theme(legend.position = 'none',
        strip.text.x = element_text(size = 8))

B

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "B")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
  geom_smooth(alpha=.1, color='red',method='lm')+
  geom_pointdensity()+
  # scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::percent_format())+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  ylab('Item yield')+
  xlab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
  theme(legend.position = 'none',
        strip.text.x = element_text(size = 8))

C

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "C")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
  geom_smooth(alpha=.1, color='red',method='lm')+
  geom_pointdensity()+
  # scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::percent_format())+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  ylab('Item yield')+
  xlab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
  theme(legend.position = 'none',
        strip.text.x = element_text(size = 8))

# # table
# df %>% 
#   count(cor, DESCRIPTION) %>% 
#   arrange(-abs(cor)) %>% 
#   mutate(
#     cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
#   ) %>% 
#   set_colnames(c("Correlation", "Description", "Observations")) %>% 
#   kable(format = 'html', escape = 'F') %>% 
#   kable_styling('striped',full_width = F)

D

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "D")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
  geom_smooth(alpha=.1, color='red',method='lm')+
  geom_pointdensity()+
  # scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::percent_format())+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  ylab('Item yield')+
  xlab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
  theme(legend.position = 'none',
        strip.text.x = element_text(size = 8))

E

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "E")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
  geom_smooth(alpha=.1, color='red',method='lm')+
  geom_pointdensity()+
  # scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::percent_format())+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  ylab('Item yield')+
  xlab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
  theme(legend.position = 'none',
        strip.text.x = element_text(size = 8))

F

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "F")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
  geom_smooth(alpha=.1, color='red',method='lm')+
  geom_pointdensity()+
  # scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::percent_format())+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  ylab('Item yield')+
  xlab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
  theme(legend.position = 'none',
        strip.text.x = element_text(size = 8))

G

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "G")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
  geom_smooth(alpha=.1, color='red', method='lm')+
  geom_pointdensity()+
  # scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::percent_format())+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  ylab('Item yield')+
  xlab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
  theme(legend.position = 'none',
        strip.text.x = element_text(size = 8))

H

# yields df of kiln
df <- df_yields_auc %>% 
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  dplyr::filter(KILN2 == "H")

# get top items fired in kiln
df_items <- df %>% 
  count(DESCRIPTION) %>% 
  arrange(-n) %>% 
  slice(1:9)

# filter original df for top items
df <- df %>% 
  dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)

# get cor values and join to original
df_cor <- df %>% 
  group_by(DESCRIPTION) %>% 
  dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
  left_join(df) %>% 
  dplyr::select(DESCRIPTION, cor) %>% 
  group_by(DESCRIPTION) %>% slice(1)

df <- df %>% 
  left_join(df_cor) %>% 
  mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>% 
  right_join(df)

# plot
df %>% 
  ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
  geom_smooth(alpha=.1, color='red',method='lm')+
  geom_pointdensity()+
  # scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
  scale_y_continuous(labels = scales::percent_format())+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_color_viridis_c()+
  ylab('Item yield')+
  xlab('Area between curves')+
  labs(title = 'AUC versus item yields')+
  facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
  theme(legend.position = 'none',
        strip.text.x = element_text(size = 8))

vs Defect rates

For example: does a higher AUC value seem to impact the rate of cracked webs observed per lot?

CW

# get lot fired total
df <- df_merged_auc %>%
  # lump kilns
  mutate(KILN2 = str_replace(KILN, "R", "")) %>% 
  mutate(reject_count_single_row = reject_vol_single_row_D * vol_piece) %>% 
  group_by(LOTNO) %>% 
  dplyr::summarise(total_lot_count_fired = sum(total_item_count_fired_D)) %>% 
  right_join(df_merged_auc)

# get defects total
df <- df %>% 
  group_by(LOTNO, CAUSE) %>%
  dplyr::summarise(total_defect_count_per_lot = sum(total_item_count_rejected_D)) %>% 
  right_join(df) %>% 
  group_by(LOTNO, CAUSE) %>%
  slice(1) %>% ungroup() %>%
  dplyr::select(LOTNO, CAUSE, total_lot_count_fired, total_defect_count_per_lot, aucDiff) %>% 
  mutate(pct_defect = 1 - (total_lot_count_fired - total_defect_count_per_lot)  / total_lot_count_fired )
  
# fill missing values
pct_defect_by_lot <- df %>% 
  pivot_wider(id_cols     = LOTNO, 
              names_from  = CAUSE, 
              values_from = pct_defect,
              values_fill = 0)

# join pct defect to aucDiff
pct_defect_by_lot <- pct_defect_by_lot %>% 
  pivot_longer(cols = BE:BIT) %>% 
  # join to aucDiff 
  left_join(
    df_merged_auc %>% 
      mutate(KILN = str_replace(KILN, "R", "")) %>% 
      group_by(LOTNO) %>% slice(1) %>% 
      dplyr::select(LOTNO, KILN, aucDiff)
  ) %>% 
  set_colnames(c("LOTNO", "CAUSE", "defect_pct", "KILN", "aucDiff")) %>% 
  mutate_if(is.character, factor)

# for CW, what is the relationship between KILN and aucDiff?
def = "CW"

pct_defect_by_lot %>%
  dplyr::filter(CAUSE == def) %>% 
  group_by(KILN) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(CAUSE == def)) %>% 
  dplyr::mutate(kiln_cor = paste0(KILN, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2,color='red',method='lm')+
  geom_pointdensity()+
  scale_color_viridis_c()+
  facet_wrap(~kiln_cor,scales='free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.15))+
  labs(
    title = paste0(def, " defect rate per lot vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0(def, " defect rate"))+
  theme(legend.position = 'none')

BE

def = "BE"

pct_defect_by_lot %>%
  dplyr::filter(CAUSE == def) %>% 
  group_by(KILN) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(CAUSE == def)) %>% 
  dplyr::mutate(kiln_cor = paste0(KILN, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2, color='red',method='lm')+
  geom_pointdensity()+
  scale_color_viridis_c()+
  facet_wrap(~kiln_cor,scales='free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.2))+
  labs(
    title = paste0(def, " defect rate per lot vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0(def, " defect rate"))+
  theme(legend.position = 'none')

DC

def = "DC"

pct_defect_by_lot %>%
  dplyr::filter(CAUSE == def) %>% 
  group_by(KILN) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(CAUSE == def)) %>% 
  dplyr::mutate(kiln_cor = paste0(KILN, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2,color='red',method='lm')+
  geom_pointdensity()+
  scale_color_viridis_c()+
  facet_wrap(~kiln_cor, scales = 'free')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.1))+
  scale_y_continuous(labels = scales::percent_format())+
  labs(
    title = paste0(def, " defect rate per lot vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0(def, " defect rate"))+
  theme(legend.position = 'none')

vs Defect rates by kiln

Within a specific kiln, how do the defect types and rates change with the AUC feature?

A

# for each kiln, compare the defect rate vs AUC for each cause
kil = "A"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2,color='red',method='lm')+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free_y')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))+
  theme(legend.position = 'none')

B

# for each kiln, compare the defect rate vs AUC for each cause
kil = "B"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2,color='red',method='lm')+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free_y')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))+
  theme(legend.position = 'none')

C

# for each kiln, compare the defect rate vs AUC for each cause
kil = "C"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2,color='red',method='lm')+
  geom_pointdensity(adjust = .1)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free_y')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))+
  theme(legend.position = 'none')

D

# for each kiln, compare the defect rate vs AUC for each cause
kil = "D"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2,color='red',method='lm')+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free_y')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))+
  theme(legend.position = 'none')

E

# for each kiln, compare the defect rate vs AUC for each cause
kil = "E"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2,color='red',method='lm')+
  geom_pointdensity(adjust = .3)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free_y')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))+
  theme(legend.position = 'none')

F

# for each kiln, compare the defect rate vs AUC for each cause
kil = "F"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2,color='red',method='lm')+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free_y')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))+
  theme(legend.position = 'none')

G

# for each kiln, compare the defect rate vs AUC for each cause
kil = "G"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2,color='red',method='lm')+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free_y')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))+
  theme(legend.position = 'none')

H

# for each kiln, compare the defect rate vs AUC for each cause
kil = "H"

pct_defect_by_lot %>%
  dplyr::filter(KILN == kil) %>% 
  group_by(CAUSE) %>%
  dplyr::summarise(cor = cor(defect_pct, aucDiff),
                   cor = ifelse(is.na(cor), 0, cor)) %>%
  # arrange(-cor) %>%
  right_join(pct_defect_by_lot %>%
               dplyr::filter(KILN == kil)) %>% 
  dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
  ggplot(aes(y=defect_pct, x=aucDiff))+
  geom_smooth(alpha=.2,color='red',method='lm')+
  geom_pointdensity(adjust = .2)+
  scale_color_viridis_c()+
  facet_wrap(~cause_cor, scales='free_y')+
  scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
  scale_y_continuous(labels = scales::percent_format())+
  # scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
  labs(
    title = paste0("Kiln ", kil, " defect rate vs AUC"),
    subtitle = "Correlation value (in parentheses)"
  )+
  xlab("Area between setpoint, kiln temp")+
  ylab(paste0("Defect rate"))+
  theme(legend.position = 'none')